Result Evaluation


In [28]:
import os
import sys
import shutil
import numpy as np
import skvideo.io

import tensorflow as tf
from tensorflow.python.ops import init_ops
from tensorflow.contrib.layers.python.layers import regularizers

slim = tf.contrib.slim
tf.reset_default_graph()
trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev)


/home/pratik/anaconda2/lib/python2.7/site-packages/skvideo/__init__.py:356: UserWarning: avconv/avprobe not found in path: 
  warnings.warn("avconv/avprobe not found in path: " + str(path), UserWarning)

In [64]:
#====================  COPIED CODE ===============================================
#
#  TENSORBOARD VISUALIZATION FOR SHARPNESS AND (Peak Signal to Noise Ratio){PSNR}
#=================================================================================
def log10(t):
    """
    Calculates the base-10 log of each element in t.
    @param t: The tensor from which to calculate the base-10 log.
    @return: A tensor with the base-10 log of each element in t.
    """
    numerator = tf.log(t)
    denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
    return numerator / denominator
    
def psnr_error(gen_frames, gt_frames):
    """
    Computes the Peak Signal to Noise Ratio error between the generated images and the ground
    truth images.
    @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
                       generator model.
    @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
                      each frame in gen_frames.
    @return: A scalar tensor. The mean Peak Signal to Noise Ratio error over each frame in the
             batch.
    """
    shape = tf.shape(gen_frames)
    num_pixels = tf.to_float(shape[1] * shape[2] * shape[3])
    square_diff = tf.square(gt_frames - gen_frames)

    batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(square_diff, [1, 2, 3])))
    return tf.reduce_mean(batch_errors)

def sharp_diff_error(gen_frames, gt_frames):
    """
    Computes the Sharpness Difference error between the generated images and the ground truth
    images.
    @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
                       generator model.
    @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
                      each frame in gen_frames.
    @return: A scalar tensor. The Sharpness Difference error over each frame in the batch.
    """
    shape = tf.shape(gen_frames)
    num_pixels = tf.to_float(shape[1] * shape[2] * shape[3])

    # gradient difference
    # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
    # TODO: Could this be simplified with one filter [[-1, 2], [0, -1]]?
    pos = tf.constant(np.identity(3), dtype=tf.float32)
    neg = -1 * pos
    filter_x = tf.expand_dims(tf.stack([neg, pos]), 0)  # [-1, 1]
    filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])  # [[1],[-1]]
    strides = [1, 1, 1, 1]  # stride of (1, 1)
    padding = 'SAME'

    gen_dx = tf.abs(tf.nn.conv2d(gen_frames, filter_x, strides, padding=padding))
    gen_dy = tf.abs(tf.nn.conv2d(gen_frames, filter_y, strides, padding=padding))
    gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding))
    gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding))

    gen_grad_sum = gen_dx + gen_dy
    gt_grad_sum = gt_dx + gt_dy

    grad_diff = tf.abs(gt_grad_sum - gen_grad_sum)

    batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(grad_diff, [1, 2, 3])))
    return tf.reduce_mean(batch_errors)

## =================== COPIED CODE ENDS ======================

def l2_loss(generated_frames, expected_frames):
    losses = []
    for each_scale_gen_frames, each_scale_exp_frames in zip(generated_frames, expected_frames):
        losses.append(tf.nn.l2_loss(tf.subtract(each_scale_gen_frames, each_scale_exp_frames)))
    
    loss = tf.reduce_mean(tf.stack(losses))
    return loss

def gdl_loss(generated_frames, expected_frames, alpha=2):
    """
    difference with side pixel and below pixel
    """
    scale_losses = []
    for i in xrange(len(generated_frames)):
        # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
        pos = tf.constant(np.identity(3), dtype=tf.float32)
        neg = -1 * pos
        filter_x = tf.expand_dims(tf.stack([neg, pos]), 0)  # [-1, 1]
        filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])  # [[1],[-1]]
        strides = [1, 1, 1, 1]  # stride of (1, 1)
        padding = 'SAME'

        gen_dx = tf.abs(tf.nn.conv2d(generated_frames[i], filter_x, strides, padding=padding))
        gen_dy = tf.abs(tf.nn.conv2d(generated_frames[i], filter_y, strides, padding=padding))
        gt_dx = tf.abs(tf.nn.conv2d(expected_frames[i], filter_x, strides, padding=padding))
        gt_dy = tf.abs(tf.nn.conv2d(expected_frames[i], filter_y, strides, padding=padding))

        grad_diff_x = tf.abs(gt_dx - gen_dx)
        grad_diff_y = tf.abs(gt_dy - gen_dy)

        scale_losses.append(tf.reduce_sum((grad_diff_x ** alpha + grad_diff_y ** alpha)))

    # condense into one tensor and avg
    return tf.reduce_mean(tf.stack(scale_losses))

def total_loss(generated_frames, expected_frames, lambda_gdl=1.0, lambda_l2=1.0):
    total_loss_cal = (lambda_gdl * gdl_loss(generated_frames, expected_frames) + 
                     lambda_l2 * l2_loss(generated_frames, expected_frames))
    return total_loss_cal

calculate on Directory !


In [35]:
file_path = ""
output_video_save_file_path = os.path.join(file_path, "../../output/")
frame_eval = (4,64,64,3) # T, H, W, C

In [46]:
# filter all mp4 files ! 
expected_file_names = set([])
generated_file_names = set([])
for root, _ , files in os.walk(output_video_save_file_path):
    for file_name in files:
        file_name = os.path.join(root, file_name)
        if file_name.endswith("_expected_large.mp4"):
            expected_file_names.add(file_name.replace("_expected_large.mp4",""))
        if file_name.endswith("_generated_large.mp4"):
            generated_file_names.add(file_name.replace("_generated_large.mp4",""))

In [47]:
common_files_in_gen_exp = expected_file_names.intersection(generated_file_names)

In [48]:
print ("Info : Evaluating on "+str(len(common_files_in_gen_exp))+" files.")


Info : Evaluating on 8 files.

In [78]:
gen_frames = []
exp_frames = []
for each_file in common_files_in_gen_exp:
    gen_file = each_file + "_generated_large.mp4"
    exp_file = each_file + "_expected_large.mp4"

    gen_video_data = skvideo.io.vread(gen_file)
    exp_video_data = skvideo.io.vread(exp_file)
    assert gen_video_data.shape == exp_video_data.shape == frame_eval

    gen_frames.append(gen_video_data)
    exp_frames.append(exp_video_data)

# get psnr_error and sharp_diff_error
gen_frames_np = np.array(gen_frames)
exp_frames_np = np.array(exp_frames)
# normalize ... !
gen_frames_np = (gen_frames_np - 127.5) / 127.5
exp_frames_np = (exp_frames_np - 127.5) / 127.5
B, T, H, W, C = gen_frames_np.shape

gen_frames = list(np.reshape(gen_frames_np, [-1,H,W,C]))
exp_frames = list(np.reshape(exp_frames_np,[-1,H,W,C]))

# psnr_tf, sharp_diff_tf
tf_gen = tf.placeholder(dtype=tf.float32,shape=[None,H,W,C])
tf_exp = tf.placeholder(dtype=tf.float32,shape=[None,H,W,C])
psnr_tf = psnr_error(tf_gen, tf_exp)
sharp_diff_tf = sharp_diff_error(tf_gen, tf_exp)

with tf.Session() as sess:
    psnr, sharp_diff = sess.run([psnr_tf, sharp_diff_tf],feed_dict={tf_gen:gen_frames, tf_exp:exp_frames})

list_of_gen_tfs = map(lambda x: tf.Variable(np.array([x]),dtype=tf.float32),gen_frames)
list_of_exp_tfs = map(lambda x: tf.Variable(np.array([x]),dtype=tf.float32),exp_frames)

l2_ls = l2_loss(list_of_gen_tfs, list_of_exp_tfs)
gd_ls = gdl_loss(list_of_gen_tfs, list_of_exp_tfs)
tot_ls = total_loss(list_of_gen_tfs, list_of_exp_tfs)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    l2, gd, tot = sess.run([l2_ls,gd_ls,tot_ls])

print l2, gd, tot


341.359 658.991 1000.35

In [77]:
print "psnr : "+str(psnr) 
print "sharp : "+str(sharp_diff)

print "l2_ls : "+str(l2)
print "gd_ls : "+str(gd)
print "tot_ls : "+str(tot)


psnr : 13.4459
sharp : 8.9947
l2_ls : 341.359
gd_ls : 658.991
tot_ls : 1000.35

In [61]:



[array([[1, 2],
       [3, 4]], dtype=int32)]

In [51]:


In [58]:


In [ ]: